how does loop length correlate with SNR?
library(dplyr)
library(purrr)
library(readr)
library(stringr)
library(ggplot2)
library(viridis)
calc_gc_content <- function(sq) {
num_g <- str_count(sq, "G")
num_c <- str_count(sq, "C")
gc_content <- (num_g + num_c) / str_length(sq)
#print(gc_content)
return(gc_content)
}
file<-'imotif_array_v2.median_SNR.with.hnRNPA1.avg_reps.genes.loops.tab'
data<-read.csv(file, sep='\t')
prctcutoff<-10
data$length_loop1<-nchar(data$loop1)
data$length_loop2<-nchar(data$loop2)
data$length_loop3<-nchar(data$loop3)
for (i in 1:nrow(data)){
data$gc_loop1[i] <- calc_gc_content(data$loop1[i])
data$gc_loop2[i] <- calc_gc_content(data$loop2[i])
data$gc_loop3[i] <- calc_gc_content(data$loop3[i])
}
top 10% of hnRNPK binders fall into only three sequence classes:
experiments<-c("hnRNPK_rep", "hnRNPLL_rep", "ASF_rep")
x<-experiments[1]
inds<- which(data[,x] > quantile(data[,x],prob=1-prctcutoff/100))
tmp<-data
tmp$cutoff<-"NA"
tmp$cutoff[inds]<-"top10"
print("Sequence classses in top 10prctile of HNRPK binding")
## [1] "Sequence classses in top 10prctile of HNRPK binding"
table(data$SeqClass[inds])
##
## 3_Cs 4_Cs 5_10_Cs pos_control
## 27 230 923 2
library(ComplexHeatmap)
library(circlize)
groups <-c('2_Cs','3_Cs','4_Cs','5_10_Cs')
for(group in groups){
tmp2<-tmp[which(tmp$SeqClass==group),]
# bin lengths
tmp2$length_loop1[tmp2$length_loop1>9] <- paste0("\u2265","10")
tmp2$length_loop2[tmp2$length_loop2>9] <- paste0("\u2265","10")
tmp2$length_loop3[tmp2$length_loop3>9] <- paste0("\u2265","10")
tmp2$length_loop1<-factor(tmp2$length_loop1,levels=c(seq(1,9), paste0("\u2265","10")))
tmp2$length_loop2<-factor(tmp2$length_loop2,levels=c(seq(1,9), paste0("\u2265","10")))
tmp2$length_loop3<-factor(tmp2$length_loop3,levels=c(seq(1,9), paste0("\u2265","10")))
mat1<-tmp2[,c("length_loop1","length_loop2","length_loop3","iMab100nM_6.5_40","hnRNPK_rep", "hnRNPLL_rep", "ASF_rep","Mx2nM_6.5_40")]
# rename experiments
names(mat1)[4:ncol(mat1)] <- c('iMab','hnRNPK','hnRNPLL','ASF/SF1','Mitoxantrone')
plot_data<-tidyr::pivot_longer(mat1,cols=c(4:ncol(mat1)),
names_to =c("experiment"),
values_to="SNR")
plot_data<-tidyr::pivot_longer(plot_data,cols=c(1:3),
names_to =c("loop_number"),
values_to="loop_length")
p<-ggplot(plot_data, aes(x=loop_length,y=SNR,group=loop_length,color=loop_length)) +
geom_boxplot() +
theme_bw() +
facet_grid(cols=vars(loop_number),rows=vars(experiment),scale='free_y') +
ggtitle(group) +
# scale_color_viridis(option="magma", discrete = T)
scale_color_viridis( discrete = T)
print(p)
}
for(group in groups){
tmp2<-tmp[which(tmp$SeqClass==group),]
# round gc content
tmp2$gc_loop1 <- round(tmp2$gc_loop1,1)
tmp2$gc_loop2 <-round(tmp2$gc_loop2,1)
tmp2$gc_loop3<-round(tmp2$gc_loop3,1)
tmp2$gc_loop1<-factor(tmp2$gc_loop1,levels=c(seq(0,1,0.1)))
tmp2$gc_loop2<-factor(tmp2$gc_loop2,levels=c(seq(0,1,0.1)))
tmp2$gc_loop3<-factor(tmp2$gc_loop3,levels=c(seq(0,1,0.1)))
mat1<-tmp2[,c("gc_loop1","gc_loop2","gc_loop3","iMab100nM_6.5_40","hnRNPK_rep", "hnRNPLL_rep", "ASF_rep","Mx2nM_6.5_40")]
plot_data<-tidyr::pivot_longer(mat1,cols=c(4:ncol(mat1)),
names_to =c("experiment"),
values_to="SNR")
plot_data<-tidyr::pivot_longer(plot_data,cols=c(1:3),
names_to =c("loop_number"),
values_to="gc")
p<-ggplot(plot_data, aes(x=gc,y=SNR,group=gc,color=gc)) +
geom_boxplot() +
theme_bw() +
facet_grid(cols=vars(loop_number),rows=vars(experiment),scale='free_y') +
ggtitle(group)+
scale_color_viridis( discrete = T)
print(p)
}
groups <-c('2_Cs','3_Cs','4_Cs','5_10_Cs')
for(group in groups){
plot_data<-tidyr::pivot_longer(mat1,cols=c(4:ncol(mat1)),
names_to =c("experiment"),
values_to="SNR")
plot_data<-tidyr::pivot_longer(plot_data,cols=c(1:3),
names_to =c("loop_number"),
values_to="loop_length")
p<-ggplot(plot_data, aes(x=SNR,group=loop_length,color=loop_length)) +
geom_density()+
theme_bw() +
facet_grid(cols=vars(loop_number),rows=vars(experiment),scale='free') +
ggtitle(group) +
# scale_color_viridis(option="magma", discrete = T)
scale_color_viridis( discrete = T)
print(p)
}
for(group in groups){
plot_data<-tidyr::pivot_longer(mat1,cols=c(4:ncol(mat1)),
names_to =c("experiment"),
values_to="SNR")
plot_data<-tidyr::pivot_longer(plot_data,cols=c(1:3),
names_to =c("loop_number"),
values_to="gc")
p<-ggplot(plot_data, aes(x=SNR,group=gc,color=gc)) +
geom_density() +
theme_bw() +
facet_grid(cols=vars(loop_number),rows=vars(experiment),scale='free_y') +
ggtitle(group)+
scale_color_viridis( discrete = T)
print(p)
}
# mat1<-tmp2[,c(x,"length_loop1","length_loop2","length_loop3")]
# mat2<-tmp2[,c(x,"gc_loop1","gc_loop2","gc_loop3")]
# col_fun= colorRamp2(c(1,10),c("yellow","blue"))
# col_fun_gc = colorRamp2(c(0,0.5,1),c("#edf8b1","#7fcdbb","#2c7fb8"))
# mat1<-mat1[order(mat1[,1],decreasing=T),]
# mat2<-mat2[order(mat2[,1],decreasing=T),]
# ComplexHeatmap::Heatmap(mat1[,2:4],
# col=col_fun,
# #row_names_side = "left",
# cluster_rows=F,
# cluster_columns=F,
# show_row_names = F,
# column_title="3_Cs"
#
# )